# ------------------------------------------------------
# Political Ambition and Opposition Legislative Review:
# Bill Scrutiny as an Intra-Party Signaling Device
# Lion Behrens, Dominic Nyhuis, Thomas Gschwend
# ------------------------------------------------------

library(stringr)         # for string manipulation
library(lme4)            # for multilevel modeling
library(mclogit)         # for multilevel multinomial logistic regressions
library(qdapRegex)       # to remove text between strings
library(stargazer)       # formatted regression tables
library(ClusterBootstrap)# for bootstrapped cluster-corrected SEs
library(dplyr)           # for data manipulation
library(mlogit)          # for multinomial logistic regression
library(MASS)            # to draw from multivariate normal
library(ggplot2)         # for data visualization
library(tikzDevice)      # to export plots
library(sandwich)        # for cluster-corrected standard errors
library(sjstats)         # to calculate intra class correlations
library(MuMIn)           # to calculate R2 from mixed models
library(readstata13)     # to read in .dta files
library(dplyr)

# cluster-robust standard errors for multinomial logistic regression
cl.mlogit   <- function(fm, cluster){
  
  # fm: a fitted mlogit model
  # cluster: a data vector with the cluster
  #          identity of each observation in fm
  
  require(sandwich, quietly = TRUE)
  require(lmtest, quietly = TRUE)
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- length(coefficients(fm))
  # edit 6/22/2015: change dfc
  # dfc <- (M/(M-1))*((N-1)/(N-K))
  dfc <- (M/(M-1))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat.=crossprod(uj)/N)
  coeftest(fm, vcovCL) 
}

# load data
load("bawue_1416.RData")
mps <- read.csv2("bawue_mps_new.csv", encoding = "UTF-8")
colnames(mps)[1] <- "mp_name"
mps_smdata <- read.csv2("bawue_mps_socialmedia.csv")
function_17 <- read.csv2("function_17.csv")
mps$function_17 <- function_17$function_17

# exclusion of four laws that didn't make it into committee at time of scraping
legistext <- legistext[-which(legistext$idDrucksache == "16/5113" |
                                legistext$idDrucksache == "16/5311" |
                                legistext$idDrucksache == "16/5421" |
                                legistext$idDrucksache == "16/5422"),]

# --------------------------------------
# 1. construct data on bill x MP level
# --------------------------------------

  # --------------------------------------
  # 1.1 data preparation and compilation
  # --------------------------------------

    # subset legistext variables
    legistext <- legistext[,c(1, 3:4, 12:15, 16:17, 18:24, 31:34, 38)]
  
    # sort legislative period-specific variables in MP data
    vars_14 <- c(1:7, 18, which(str_detect(colnames(mps), "14")))
    vars_15 <- c(1:7, 18, which(str_detect(colnames(mps), "15")))
    vars_16 <- c(1:7, 18, which(str_detect(colnames(mps), "16")))
  
    # duplicate bills as many times as MPs were present in legislative period
    wp14 <- which(substr(legistext$idDrucksache, 1, 2) == "14")
    bill_mp14 <- legistext[rep(wp14, each = length(which(mps$LP_14==1))),]
    rownames(bill_mp14) <- 1:nrow(bill_mp14)
    bill_mp14 <- cbind(bill_mp14, mps[mps$LP_14==1, vars_14]) 
    bill_mp14$function_14 <- paste(sapply(bill_mp14$function_14_1, "[[", 1), 
                                   sapply(bill_mp14$function_14_2, "[[", 1), 
                                   sep = " SEP ")
    bill_mp14 <- bill_mp14[,-c(37, 38)]
    for (row in 1: nrow(bill_mp14)) {
      bill_mp14$function_14[row] <- 
        ifelse(as.Date(bill_mp14$datestemp[row],"%d.%m.%Y") < as.Date("10.02.2010","%d.%m.%Y"),
               sub(" SEP.*", "", bill_mp14$function_14[row]),
               sub(".*SEP ", "", bill_mp14$function_14[row])
        )
      }
    
    wp15 <- which(substr(legistext$idDrucksache, 1, 2) == "15")
    bill_mp15 <- legistext[rep(wp15, each = length(which(mps$LP_15==1))),]
    rownames(bill_mp15) <- 1:nrow(bill_mp15)
    bill_mp15 <- cbind(bill_mp15, mps[mps$LP_15==1, vars_15]) 
    
    wp16 <- which(substr(legistext$idDrucksache, 1, 2) == "16")
    bill_mp16 <- legistext[rep(wp16, each = length(which(mps$LP_16==1))),]
    rownames(bill_mp16) <- 1:nrow(bill_mp16)
    bill_mp16 <- cbind(bill_mp16, mps[mps$LP_16==1, vars_16]) 
    
    # bind datasets together
    colnames(bill_mp14)[30:51] <- substr(colnames(bill_mp14)[30:51], 1, nchar(colnames(bill_mp14)[30:51])-3)
    colnames(bill_mp15)[30:51] <- substr(colnames(bill_mp15)[30:51], 1, nchar(colnames(bill_mp15)[30:51])-3)
    colnames(bill_mp16)[30:51] <- substr(colnames(bill_mp16)[30:51], 1, nchar(colnames(bill_mp16)[30:51])-3)
    colnames(bill_mp15)[47] <- "co.partisan_MP_of_same.district"
    colnames(bill_mp16)[49] <- "urban.district"
    
    billxmp <- rbind(bill_mp14, bill_mp15, bill_mp16)
    rownames(billxmp) <- 1:nrow(billxmp)
    colnames(billxmp)[51] <- "function_highest"
    
    # delete MPs that left parliament prior to introduction of bill
    left_early <- which(!as.Date(billxmp$datestemp, "%d.%m.%Y") < as.Date(billxmp$ausgeschieden_am, "%d.%m.%Y"))
    billxmp <- billxmp[-left_early,]
    rownames(billxmp) <- 1:nrow(billxmp)
    
    # delete MPs that joined after introduction of bill
    came_late <- which(as.Date(billxmp$datestemp, "%d.%m.%Y") < as.Date(billxmp$eingetreten_am, "%d.%m.%Y"))
    billxmp <- billxmp[-came_late,]
    rownames(billxmp) <- 1:nrow(billxmp)
  
  
  # --------------------------------------
  # 1.2 dependent variables
  # --------------------------------------
    
    billxmp$idDrucksache <- gsub(" ", "", billxmp$idDrucksache)
    amendmentprop$idDrucksacheLegis <- gsub(" ", "", amendmentprop$idDrucksacheLegis)
    billxmp$amend <- 0 # binary indicator (amendment vs. no amendment)
    billxmp$n_articles <- 0 # number of amended articles
    billxmp$worddist <- 0 # raw Levenshtein distance
    billxmp$worddist_weighted <- 0 # continuous constructiveness indicator
    
    for (i in 1: nrow(billxmp)) {
      
      # identify amendmentprops for specific bill that MP contributed to
      
        ### if there was no amendmentprop for law, next
        amend_law <- amendmentprop[which(amendmentprop$idDrucksacheLegis == billxmp$idDrucksache[i]),]
        if (nrow(amend_law) == 0) 
          next
        
        ### if MP didn't contribute to any amendmentprop for law, next
        ids <- which(str_detect(amend_law$members, str_c(as.character(billxmp$last.name[i]), "\\b")))
        if (length(ids) == 0) 
          next
  
      # fill up variables
      billxmp$amend[i] <- 1
      billxmp$n_articles[i] <- length(unlist(unique(amend_law$n_articles[ids])))
      billxmp$worddist[i] <- sum(amend_law$worddist[ids])
      billxmp$worddist_weighted[i] <- sum(amend_law$worddist_weighted[ids])
      
    }
    
    
    # manually correct entries for MPs that share their last name 
    double_names <- data.frame(table(mps$last.name))
    double_names <- double_names[double_names$Freq > 1,]
    
      # MP: Kern, FDP
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="FDP/DVP" & billxmp$idDrucksache=="15/4325"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="FDP/DVP" & billxmp$idDrucksache=="15/4352"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="FDP/DVP" & billxmp$idDrucksache=="15/7554"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="FDP/DVP" & billxmp$idDrucksache=="16/2230"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="FDP/DVP" & billxmp$idDrucksache=="16/3248"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="FDP/DVP" & billxmp$idDrucksache=="16/5111"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      
      # MP: Kern, Gruene
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="15/5259"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="15/6961"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="15/7134"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="15/7957"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/1749"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Kern" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/3855"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      
      # MP: Schmid, SPD
      billxmp[which(billxmp$last.name=="Schmid" & billxmp$partei=="SPD" & billxmp$idDrucksache=="15/7134"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      
      # MP: Schwarz, Andrea (f), Gruene
      billxmp[which(billxmp$last.name=="Schwarz" & billxmp$first.name=="Andrea" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/1617"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Schwarz" & billxmp$first.name=="Andrea" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/2231"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Schwarz" & billxmp$first.name=="Andrea" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/2740"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Schwarz" & billxmp$first.name=="Andrea" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/2741"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      billxmp[which(billxmp$last.name=="Schwarz" & billxmp$first.name=="Andrea" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/5111"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0
      
      # MP: Schwarz, Andreas (m), Gruene
      billxmp[which(billxmp$last.name=="Schwarz" & billxmp$first.name=="Andreas" & billxmp$partei=="Gruene" & billxmp$idDrucksache=="16/334"), 
              c("amend", "n_articles", "worddist", "worddist_weighted")] <- 0

      
      
  # ----------------------------------
  # 1.3 independent variables
  # ----------------------------------
  
    # ---------------------------------
    # 1.3.1 contextual variables
    # ---------------------------------
    
      # time until next election (number of months)
      billxmp$datedist <- NA
    
      billxmp$datedist[substr(billxmp$idDrucksache, 1, 2) == "14"] <- as.Date("2011-03-27") - as.Date(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "14"], format="%d.%m.%Y")
      billxmp$datedist[substr(billxmp$idDrucksache, 1, 2) == "15"] <- as.Date("2016-03-13") - as.Date(as.character(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "15"]), format="%d.%m.%Y")
      billxmp$datedist[substr(billxmp$idDrucksache, 1, 2) == "16"] <- as.Date("2021-03-14") - as.Date(as.character(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "16"]), format="%d.%m.%Y") 
  
      billxmp$datedist <- billxmp$datedist/30
      
      # size of faction
      billxmp$factionsize <- NA
      billxmp$parlsize <- NA
      billxmp$rel_factionsize <- NA
      
      billxmp$parlsize[substr(billxmp$idDrucksache, 1, 2) == "14"] <- 139
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "14" & billxmp$partei == "CDU"] <- 69
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "14" & billxmp$partei == "SPD"]  <- 38
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "14" & billxmp$partei == "Gruene"] <- 17
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "14" & billxmp$partei == "FDP/DVP"] <- 15
      
      billxmp$parlsize[substr(billxmp$idDrucksache, 1, 2) == "15"] <- 138 
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "15" & billxmp$partei == "CDU"] <- 60
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "15" & billxmp$partei == "SPD"]  <- 35
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "15" & billxmp$partei == "Gruene"] <- 36
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "15" & billxmp$partei == "FDP/DVP"] <- 7
      
      billxmp$parlsize[substr(billxmp$idDrucksache, 1, 2) == "16"] <- 143
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$partei == "CDU"] <- 42
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$partei == "SPD"]  <- 19
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$partei == "Gruene"] <- 47 
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$partei == "FDP/DVP"] <- 12
      billxmp$factionsize[substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$partei == "AfD"] <- 23
      
      billxmp$factionsize[billxmp$partei == "fraktionslos"] <- 1
      billxmp$rel_factionsize <- billxmp$factionsize/billxmp$parlsize
      
      
    # ---------------------------------
    # 1.3.2 bill-specific variables
    # ---------------------------------  
      
      # bill length
      billxmp$bill_length <- NA
      
      for (i in 1: nrow(billxmp)) {
        
        billxmp$bill_length[i] <- 
          length(which(str_detect(billxmp$rawtext_left_orig[i][[1]],  
                                  paste(str_c("^Artikel ", 1:300,"$"), collapse = "|"))))
      }
     
      billxmp$bill_length[which(billxmp$bill_length == 0)] <- 1
      
      # opposition chair in committee
      # if laws have not been sent to a committee, code as 0 as well
      billxmp$oppchair_committee[which(is.na(billxmp$oppchair_committee))] <- 0
      
      # bill importance
      billxmp$prot_relpos_1[which(is.na(billxmp$prot_relpos_1))] <- 7/18
      billxmp$bill_imp <- (billxmp$prot_relpos_1)^(-1)
      
      # CAP field
      # recategorize all policy fields with less than 10 observations into "Other"
      billxmp$cap1_field[billxmp$cap1_field=="Defense" |
                         billxmp$cap1_field=="International Affairs and Foreign Aid" |
                         billxmp$cap1_field=="State and Local Government Administration" |
                         billxmp$cap1_field=="Public Lands and Water Management"] <- "Other"
      
      
      
    # ---------------------------------
    # 1.3.3 MP-specific variables
    # ---------------------------------  
  
      # opposition or coalition MP?
      wp14 <- which(substr(billxmp$idDrucksache, 1, 2) == "14" & (billxmp$partei=="CDU" | billxmp$partei=="FDP/DVP"))
      wp15 <- which(substr(billxmp$idDrucksache, 1, 2) == "15" & (billxmp$partei=="Gruene" | billxmp$partei=="SPD"))
      wp16 <- which(substr(billxmp$idDrucksache, 1, 2) == "16" & (billxmp$partei=="Gruene" | billxmp$partei=="CDU"))
      billxmp$opposition <- 1
      billxmp$opposition[c(wp14, wp15, wp16)] <- 0
      
      wp14 <- which(substr(billxmp$idDrucksache, 1, 2) == "14" & (billxmp$partei=="CDU" | billxmp$partei=="FDP/DVP"))
      
      # seniority (number of years in parliament at introduction of bill)
      billxmp$experience <- as.numeric(substr(billxmp$datestemp, 9, 12)) - billxmp$seniority
      
      # age
      billxmp$age <- as.numeric(str_sub(billxmp$datestemp, -4, -1)) - billxmp$year.born
      
      # age squared
      billxmp$age_squared <- billxmp$age^2
      
      # female
      table(billxmp$weiblich)
      
      # district magnitude
      billxmp$district_magnitude <- as.numeric(as.character(billxmp$district_magnitude))
  
      # party fixed effects
      table(billxmp$partei)
    
      # mandate
      table(billxmp$Mandat)
      
      # urban district
      billxmp$urban.district[which(billxmp$urban.district==2)] <- 1
      billxmp$urban.district <- as.factor(billxmp$urban.district)
      table(billxmp$urban.district)
  
      # committee membership
      billxmp$committee_member <- 0
      for (row in 1: nrow(billxmp)) {
        
        if (is.na(billxmp$idCommittee[row]))
          next
        
        if(str_detect(billxmp$committee.membership[row], billxmp$idCommittee[row]))
          billxmp$committee_member[row] <- 1
        
      }
      
      # committee position 
      billxmp$committee_position <- 0
      for (row in 1: nrow(billxmp)) {
        
        # bill not sent to committee
        if (is.na(billxmp$idCommittee[row])) {
          billxmp$committee_position[row] <- "no member"
          next
        }
        
        # membership
        billxmp$committee_position[row] <- 
          ifelse(str_detect(billxmp$committee.membership[row], billxmp$idCommittee[row]),
                 "member", 
                 "no member"
          )
        
        # (co-)chair
        if(str_detect(billxmp$committee.position[row], billxmp$idCommittee[row])) {
          
          if(str_detect(billxmp$committee.position[row], "Vorsitz") | str_detect(billxmp$committee.position[row], "Virsitz") |str_detect(billxmp$committee.position[row], "Vorstand"))
            billxmp$committee_position[row] <- "chair"
          
        }
        
      }
      
      billxmp$committee_position <- as.factor(billxmp$committee_position)
      billxmp$committee_position <- relevel(billxmp$committee_position, 
                                            ref="no member")
      
      # --------------------
      # highest position
      # dataset: billxmp
      # --------------------
      
        # executive leadership
        billxmp$position_highest <- billxmp$function_highest
        
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Minister"))] <- "executive leadership"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Staatssekretär"))] <- "executive leadership"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Staatssektretär"))] <- "executive leadership"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Staatssekräterin"))] <- "executive leadership"
        
        # faction leadership
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Fraktionsvorsitz"))] <- "faction leadership"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Geschäftsführer"))] <- "faction leadership"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Gechäftsführer"))] <- "faction leadership"
        
        # parliamentary leadership
        billxmp$position_highest[which(billxmp$committee_position == "chair")] <- "parliamentary leadership"
        
        # other and none
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Landtagspräsident"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Präsident"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Präsidium"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Mitglied des Pr"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Schrift"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Schrit"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Kontrollgremium"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Artikel 10 GG"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Vertreter"))] <- "0"
        billxmp$position_highest[which(str_detect(billxmp$function_highest,
                                                  "Oberrheinrat"))] <- "0"
        
        billxmp$position_highest <- as.factor(billxmp$position_highest)
        billxmp$position_highest <- relevel(billxmp$position_highest, re="0")
        
        ### in the model predicting legislative activity, being part of the parliamentary
        ### presidium should *not* lead to higher activity. However, in the model predicting 
        ### leadership positions at t+1, being part of the presidium counts as leadership
        ### both at t+1 and t. Hence, I need two different variables here with adjusted codings. 
        ### This is the variable for the dataset termXMP
        billxmp$position_highest_termXMP <- billxmp$position_highest
        
        billxmp$position_highest_termXMP[which(str_detect(billxmp$function_highest,
                                                  "Landtagspräsident"))] <- "parliamentary leadership"
        billxmp$position_highest_termXMP[which(str_detect(billxmp$function_highest,
                                                  "Präsident"))] <- "parliamentary leadership"
        billxmp$position_highest_termXMP[which(str_detect(billxmp$function_highest,
                                                  "Präsidium"))] <- "parliamentary leadership"
        billxmp$position_highest_termXMP[which(str_detect(billxmp$function_highest,
                                                  "Mitglied des Pr"))] <- "parliamentary leadership"
        
        
        
        
 
      # --------------------
      # highest position
      # dataset: mps
      # --------------------
        
        mps$position_highest_14_1 <- mps$function_14_1
        mps$position_highest_14_2 <- mps$function_14_2
        mps$position_highest_15 <- mps$function_15
        mps$position_highest_16 <- mps$function_16
        mps$position_highest_17 <- mps$function_17
        
        # executive leadership
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Minister"))] <- "executive leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Minister"))] <- "executive leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Minister"))] <- "executive leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Minister"))] <- "executive leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Minister"))] <- "executive leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Staatssekretär"))] <- "executive leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Staatssekretär"))] <- "executive leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Staatssekretär"))] <- "executive leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Staatssekretär"))] <- "executive leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Staatssekretär"))] <- "executive leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Staatssektretär"))] <- "executive leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Staatssektretär"))] <- "executive leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Staatssektretär"))] <- "executive leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Staatssektretär"))] <- "executive leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Staatssektretär"))] <- "executive leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Staatssekräterin"))] <- "executive leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Staatssekräterin"))] <- "executive leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Staatssekräterin"))] <- "executive leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Staatssekräterin"))] <- "executive leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Staatssekräterin"))] <- "executive leadership"
      
        
        # faction leadership
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Fraktionsvorsitz"))] <- "faction leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Fraktionsvorsitz"))] <- "faction leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Fraktionsvorsitz"))] <- "faction leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Fraktionsvorsitz"))] <- "faction leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Fraktionsvorsitz"))] <- "faction leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Geschäftsführer"))] <- "faction leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Geschäftsführer"))] <- "faction leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Geschäftsführer"))] <- "faction leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Geschäftsführer"))] <- "faction leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Geschäftsführer"))] <- "faction leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Gechäftsführer"))] <- "faction leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Gechäftsführer"))] <- "faction leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Gechäftsführer"))] <- "faction leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Gechäftsführer"))] <- "faction leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Gechäftsführer"))] <- "faction leadership"
        
        
        # parliamentary leadership
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Landtagspräsident"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Landtagspräsident"))] <- "parliamentary leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Landtagspräsident"))] <- "parliamentary leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Landtagspräsident"))] <- "parliamentary leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Landtagspräsident"))] <- "parliamentary leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Präsident"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Präsident"))] <- "parliamentary leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Präsident"))] <- "parliamentary leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Präsident"))] <- "parliamentary leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Präsident"))] <- "parliamentary leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Präsidium"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Präsidium"))] <- "parliamentary leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Präsidium"))] <- "parliamentary leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Präsidium"))] <- "parliamentary leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Präsidium"))] <- "parliamentary leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                   "Mitglied des Pr"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Mitglied des Pr"))] <- "parliamentary leadership"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                 "Mitglied des Pr"))] <- "parliamentary leadership"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                 "Mitglied des Pr"))] <- "parliamentary leadership"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Mitglied des Pr"))] <- "parliamentary leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$committee.position_14, 
                                                   "Vorsitz"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$committee.position_14, 
                                                   "Vorsitz"))] <- "parliamentary leadership"
        mps$position_highest_15[which(str_detect(mps$committee.position_15, 
                                                   "Vorsitz"))] <- "parliamentary leadership"
        mps$position_highest_16[which(str_detect(mps$committee.position_16, 
                                                 "Vorsitz"))] <- "parliamentary leadership"
        mps$position_highest_17[which(str_detect(mps$committee.position_17, 
                                                 "Vorsitz"))] <- "parliamentary leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$committee.position_14, 
                                                   "Virsitz"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$committee.position_14, 
                                                   "Virsitz"))] <- "parliamentary leadership"
        
        mps$position_highest_14_1[which(str_detect(mps$committee.position_14, 
                                                   "Vorstand"))] <- "parliamentary leadership"
        mps$position_highest_14_2[which(str_detect(mps$committee.position_14, 
                                                   "Vorstand"))] <- "parliamentary leadership"
        mps$position_highest_15[which(str_detect(mps$committee.position_15, 
                                                   "Vorstand"))] <- "parliamentary leadership"
        mps$position_highest_16[which(str_detect(mps$committee.position_16, 
                                                 "Vorstand"))] <- "parliamentary leadership"
        mps$position_highest_17[which(str_detect(mps$committee.position_17, 
                                                 "Vorstand"))] <- "parliamentary leadership"
        
        # other and none
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Schrift"))] <- "0"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Schrift"))] <- "0"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Schrift"))] <- "0"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Schrift"))] <- "0"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Schrift"))] <- "0"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Schrit"))] <- "0"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Schrit"))] <- "0"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Schrit"))] <- "0"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Schrit"))] <- "0"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Schrit"))] <- "0"
  
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Kontrollgremium"))] <- "0"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Kontrollgremium"))] <- "0"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Kontrollgremium"))] <- "0"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Kontrollgremium"))] <- "0"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Kontrollgremium"))] <- "0"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Artikel 10 GG"))] <- "0"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Artikel 10 GG"))] <- "0"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Artikel 10 GG"))] <- "0"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Artikel 10 GG"))] <- "0"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Artikel 10 GG"))] <- "0"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Vertreter"))] <- "0"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Vertreter"))] <- "0"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Vertreter"))] <- "0"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Vertreter"))] <- "0"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Vertreter"))] <- "0"
        
        mps$position_highest_14_1[which(str_detect(mps$position_highest_14_1,
                                                  "Oberrheinrat"))] <- "0"
        mps$position_highest_14_2[which(str_detect(mps$position_highest_14_2,
                                                   "Oberrheinrat"))] <- "0"
        mps$position_highest_15[which(str_detect(mps$position_highest_15,
                                                   "Oberrheinrat"))] <- "0"
        mps$position_highest_16[which(str_detect(mps$position_highest_16,
                                                   "Oberrheinrat"))] <- "0"
        mps$position_highest_17[which(str_detect(mps$position_highest_17,
                                                 "Oberrheinrat"))] <- "0"
        
      # political ambition: candidatures 
      billxmp$candidatures <- 99
      billxmp$candidatures[which(billxmp$ambitionen == "Bundestagsliste")] <- "prog"
      billxmp$candidatures[which(billxmp$ambitionen == "EP-Liste")] <- "prog"
      billxmp$candidatures[which(billxmp$ambitionen == "Landtagsliste")] <- "static"
      billxmp$candidatures[which(billxmp$ambitionen == "Landtagsliste Saarland")] <- "static"
      billxmp$candidatures[which(billxmp$ambitionen == "keine Kandidatur")] <- "none"
      billxmp$candidatures <- as.factor(billxmp$candidatures)
      billxmp$candidatures <- relevel(billxmp$candidatures, ref="none")
      
      # political ambition: seniority * function
      billxmp$first_term <- NA
      for (row in 1: nrow(billxmp)) {
        
        if(substr(billxmp$idDrucksache[row], 1, 2) == "14")
          billxmp$first_term[row] <- 
            ifelse(!billxmp$seniority[row] < 2006,
                 1, 0)
        
        if(substr(billxmp$idDrucksache[row], 1, 2) == "15")
          billxmp$first_term[row] <- 
            ifelse(!billxmp$seniority[row] < 2011,
                   1, 0)
        
        if(substr(billxmp$idDrucksache[row], 1, 2) == "16")
          billxmp$first_term[row] <- 
            ifelse(!billxmp$seniority[row] < 2016,
                   1, 0)
          
      }
     
      # political ambition: Twitter or Facebook profile
      mps_smdata <- mps_smdata[,-8]
      mps_smdata$facebook_id_b[which(mps_smdata$facebook_id_b==0)] <- NA
      mps_smdata$twitter_id[which(mps_smdata$twitter_id==0)] <- NA
      
        # subset those that have professional Twitter or Facebook profile
        mps_smdata <- mps_smdata[-which(rowSums(is.na(mps_smdata[,7:8])) == 2),]
      
        billxmp$media <- 0
        for (i in 1:nrow(billxmp)) {
          if (is.element(billxmp$mp_name[i], mps_smdata$name))
            billxmp$media[i] <- 1
        }

      # indicator for position in next legislative term
      # leadership position vs. no leadership position vs. dropout
      billxmp$leadership_next <- NA
      for (row in 1: nrow(billxmp)) {
        
        id <- which(mps$mp_name == billxmp$mp_name[row])
        
        # ------------
        # WP 14
        # ------------
        if(substr(billxmp$idDrucksache[row], 1, 2) == "14") {
          
          # drop out of parliament 
          if (mps$position_highest_15[id] == "98")
            billxmp$leadership_next[row] <- "dropout"
          
          # no function or other
          if(mps$position_highest_15[id] == "0")
            billxmp$leadership_next[row] <- "no function"
          
          # executive leadership
          if(mps$position_highest_15[id] == "executive leadership")
            billxmp$leadership_next[row] <- "leadership"
          
          # faction leadership
          if(mps$position_highest_15[id] == "faction leadership")
            billxmp$leadership_next[row] <- "leadership"
          
          # parliamentary leadership
          if(mps$position_highest_15[id] == "parliamentary leadership")
            billxmp$leadership_next[row] <- "leadership"
          
        }
        
        # ------------
        # WP 15
        # ------------
        if(substr(billxmp$idDrucksache[row], 1, 2) == "15") {
          
          # drop out of parliament 
          if (mps$position_highest_16[id] == "98")
            billxmp$leadership_next[row] <- "dropout"
          
          # no function or other
          if(mps$position_highest_16[id] == "0")
            billxmp$leadership_next[row] <- "no function"
          
          # executive leadership
          if(mps$position_highest_16[id] == "executive leadership")
            billxmp$leadership_next[row] <- "leadership"
          
          # faction leadership
          if(mps$position_highest_16[id] == "faction leadership")
            billxmp$leadership_next[row] <- "leadership"
          
          # parliamentary leadership
          if(mps$position_highest_16[id] == "parliamentary leadership")
            billxmp$leadership_next[row] <- "leadership"
          
        }
        
        
        # ------------
        # WP 16
        # ------------
        if(substr(billxmp$idDrucksache[row], 1, 2) == "16") {
          
          # drop out of parliament 
          if (mps$position_highest_17[id] == "98")
            billxmp$leadership_next[row] <- "dropout"
          
          # no function or other
          if(mps$position_highest_17[id] == "0")
            billxmp$leadership_next[row] <- "no function"
          
          # executive leadership
          if(mps$position_highest_17[id] == "executive leadership")
            billxmp$leadership_next[row] <- "leadership"
          
          # faction leadership
          if(mps$position_highest_17[id] == "faction leadership")
            billxmp$leadership_next[row] <- "leadership"
          
          # parliamentary leadership
          if(mps$position_highest_17[id] == "parliamentary leadership")
            billxmp$leadership_next[row] <- "leadership"
          
        }
        
        
      }
      
      # Add vertical "promotions" to higher parliaments
     
      billxmp$leadership_next[which(substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$mp_name=="Jörg Meuthen")] <- "leadership"
      billxmp$leadership_next[which(substr(billxmp$idDrucksache, 1, 2) == "14" & billxmp$mp_name=="Ute Vogt")] <- "leadership"
      billxmp$leadership_next[which(substr(billxmp$idDrucksache, 1, 2) == "16" & billxmp$mp_name=="Lars Patrick Berg")] <- "higher parliament"
      billxmp$leadership_next[which(billxmp$mp_name=="Andreas Glück")] <- "higher parliament"
      billxmp$leadership_next[which(substr(billxmp$idDrucksache, 1, 2) != "14" & billxmp$mp_name=="Nils Schmid")] <- "higher parliament"
      billxmp$leadership_next[which(billxmp$mp_name=="Felix Schreiner")] <- "higher parliament"
      billxmp$leadership_next[which(billxmp$mp_name=="Michael Theurer")] <- "higher parliament"
      
      
      billxmp$leadership_next <- as.factor(billxmp$leadership_next)
      billxmp$leadership_next <- relevel(billxmp$leadership_next, ref="dropout")
      
      ### load final dataset including n_speeches and n_anfragen 
      load("billxmp_ejpr.RData")
      billxmp$newbie <- billxmp$first_term
      
      # term
      billxmp$term <- NA
      billxmp$term[which(substr(billxmp$idDrucksache, 1, 2) == "14")] <- 14
      billxmp$term[which(substr(billxmp$idDrucksache, 1, 2) == "15")] <- 15
      billxmp$term[which(substr(billxmp$idDrucksache, 1, 2) == "16")] <- 16
      
      # time until next election (number of days)
      billxmp$datedist_days <- NA
      billxmp$datedist_days[substr(billxmp$idDrucksache, 1, 2) == "14"] <- as.Date("2011-03-27") - as.Date(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "14"], format="%d.%m.%Y")
      billxmp$datedist_days[substr(billxmp$idDrucksache, 1, 2) == "15"] <- as.Date("2016-03-13") - as.Date(as.character(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "15"]), format="%d.%m.%Y")
      billxmp$datedist_days[substr(billxmp$idDrucksache, 1, 2) == "16"] <- as.Date("2021-03-14") - as.Date(as.character(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "16"]), format="%d.%m.%Y") 
      
      # days passed after election 
      billxmp$days_passed <- NA
      billxmp$days_passed[substr(billxmp$idDrucksache, 1, 2) == "14"] <- as.Date(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "14"], format="%d.%m.%Y") - as.Date("2006-03-26")
      billxmp$days_passed[substr(billxmp$idDrucksache, 1, 2) == "15"] <- as.Date(as.character(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "15"]), format="%d.%m.%Y") - as.Date("2011-03-27")
      billxmp$days_passed[substr(billxmp$idDrucksache, 1, 2) == "16"] <- as.Date(as.character(billxmp$datestemp[substr(billxmp$idDrucksache, 1, 2) == "16"]), format="%d.%m.%Y") - as.Date("2016-03-13")
      
      # closeness of electoral race
      billxmp$vote_margin <- str_remove(billxmp$vote_margin, "_[:digit:]")
      billxmp$vote_margin <- as.numeric(str_replace(billxmp$vote_margin, 
                                         ",", "."))
      billxmp$Stimmenanteil <- str_remove(billxmp$Stimmenanteil, "_[:digit:]")
      billxmp$Stimmenanteil <- as.numeric(str_replace(billxmp$Stimmenanteil, 
                                                      ",", "."))
      billxmp$vote_distance <- NA
      for (mp in 1:nrow(billxmp)) {
        
        # construct closeness for Erstmandate
        if(billxmp$Mandat[mp]=="Erstmandat") {
          runnerup_id <- which(billxmp$Mandat=="Zweitmandat" & billxmp$Wahlkreisnummer==billxmp$Wahlkreisnummer[mp] & billxmp$term==billxmp$term[mp])
          if(length(runnerup_id)==0) {
            billxmp$vote_distance[mp] <- billxmp$vote_margin[mp]
            next
          }
          runnerup_id <- runnerup_id[which.max(billxmp$Stimmenanteil[runnerup_id])]
          billxmp$vote_distance[mp] <- abs(billxmp$Stimmenanteil[mp] - billxmp$Stimmenanteil[runnerup_id])
        }
        
        # construct closeness for Zweitmandate
        if(billxmp$Mandat[mp]=="Zweitmandat") {
          winner_id <- which(billxmp$Mandat=="Erstmandat" & billxmp$Wahlkreisnummer==billxmp$Wahlkreisnummer[mp] & billxmp$term==billxmp$term[mp])[1]
          billxmp$vote_distance[mp] <- abs(billxmp$Stimmenanteil[winner_id] - billxmp$Stimmenanteil[mp])
        }
       
      }
      
      # to understand the closeness measure
      billxmp[c(21273, 21398, 21404),
              c("mp_name", "Mandat", "term", "Wahlkreis", "vote_margin", "Stimmenanteil", "vote_distance")]
      
      
      
# --------------------------------------------
# 2. statistical modeling
# political ambition -> legislative review
# --------------------------------------------

      billxmp$Mandat <- as.factor(billxmp$Mandat)
      billxmp <- within(billxmp, 
                        Mandat <- relevel(Mandat, ref = 2)
                        )
      
      # define data
      billxmp_opp <- billxmp[which(billxmp$opposition==1),]
      billxmp_gov <- billxmp[which(billxmp$opposition==0),]
      dat_gov <- billxmp_gov[-which(billxmp_gov$candidatures=="99"),]
      dat_opp <- billxmp_opp[-which(billxmp_opp$candidatures=="99"),]

      # define controls
      controls <- ~. + weiblich + age + experience + newbie + committee_member + position_highest + Mandat + urban.district + log(district_magnitude) + vote_distance + 
        log(factionsize) + log(datedist) + oppchair_committee + log(n_bills_field) + log(bill_length) + cap1_field + (1|idDrucksache) + (1|mp_name)
      
      
      ### --------------------------------------------
      # 2.1 y = amend (binary)
      ### --------------------------------------------
        
        # opposition MPs
        m1_null <- glmer(amend ~ (1|idDrucksache), 
                         family="binomial", dat = dat_opp)
        summary(m1_null)
        icc(m1_null)
        
        m1 <- glmer(update(amend ~ candidatures, controls), 
                    family="binomial", nAGQ=0, data = dat_opp)
        summary(m1)  
        performance::icc(m1, by_group=T)
        r.squaredGLMM(m1)
        
        m2 <- glmer(update(amend ~ media, controls), 
                    family="binomial", nAGQ=0, data = dat_opp)
        summary(m2)  
        performance::icc(m2, by_group=T)
        r.squaredGLMM(m2)
        
       
      ### --------------------------------------
      # 2.2 y = n_articles
      ### --------------------------------------
      
      
        # opposition MPs
        m3_null <- glmer.nb(n_articles ~ (1|idDrucksache), 
                            data=dat_opp)
        summary(m3_null)
        icc(m3_null)
        
        m3 <- glmer.nb(update(n_articles ~ candidatures, controls), nAGQ=0, data = dat_opp)
        summary(m3)
        performance::icc(m3, by_group=T)
        r.squaredGLMM(m3)
        
        m4 <- glmer.nb(update(n_articles ~ media, controls), nAGQ=0, data = dat_opp)
        summary(m4)
        performance::icc(m4, by_group=T)
        r.squaredGLMM(m4)
        
        stargazer(m1, m2, m3, m4,
                  covariate.labels = c("Progressive Ambition", "Static Ambition", "Social Media Presence", "Female", "Age",
                                       "Seniority", "First Term", "Committee Member", "Party Group Leadership", "Parliamentary Leadership", "District Winner", 
                                       "Urban", "Log (District Magnitude)", "Closeness of Electoral Race", "Log (Party Group Size)", "Log (Date Distance)", "Opposition Chair Committee", "Log(Number of Bills in Policy Field)", "Log (Bill Length)"), 
                  star.cutoffs = c(.05, .01, .001)) 
         
        
    
        
# --------------------------------------------
# 3. statistical modeling
# legislative review -> legislative careers
# --------------------------------------------
    
      termxmp <- billxmp %>% 
        group_by(term, mp_name) %>% 
        summarize(n_amends = sum(amend),
                  n_articles = sum(n_articles),
                  worddist = sum(worddist), 
                  worddist_weighted = sum(worddist_weighted),
                  year.born = first(year.born),
                  age = first(age),
                  weiblich = first(weiblich),
                  partei = first(partei),
                  seniority = first(seniority),
                  experience = first(experience),
                  position_highest = first(position_highest_termXMP),
                  district_magnitude = first(district_magnitude),
                  urban.district = first(urban.district),
                  factionsize = first(factionsize),
                  opposition = first(opposition), 
                  leadership_next = first(leadership_next),
                  candidatures = first(candidatures), 
                  media = first(media), 
                  n_bills_committee = sum(committee_member),
                  n_speeches = first(n_speeches),
                  n_anfragen = first(n_anfragen),
                  newbie = first(first_term)
                  )
      termxmp$district_magnitude <- as.numeric(termxmp$district_magnitude)
      
      # collapse control variable for current position into binary indicator due to too few observations
      termxmp$position_highest <- as.character(termxmp$position_highest)
      termxmp$position_highest[termxmp$position_highest == "executive leadership" |
                               termxmp$position_highest == "faction leadership" |
                               termxmp$position_highest == "parliamentary leadership"] <- 1
      
      # construct binary dependent variable
      
        ### 0: dropout & no function, 1: leadership
        termxmp$leadership_next_binary <- termxmp$leadership_next
        termxmp$leadership_next_binary <- as.character(termxmp$leadership_next_binary)
        termxmp$leadership_next_binary[termxmp$leadership_next_binary == "dropout" |
                                         termxmp$leadership_next_binary == "no function"] <- 0
        termxmp$leadership_next_binary[termxmp$leadership_next_binary == "leadership"] <- 1
        termxmp$leadership_next_binary[termxmp$leadership_next_binary == "higher parliament"] <- 1
        termxmp$leadership_next_binary <- as.numeric(termxmp$leadership_next_binary)
      
        ### NA: dropout, 0: no function, 1: leadership
        termxmp$leadership_next_binary2 <- termxmp$leadership_next
        termxmp$leadership_next_binary2 <- as.character(termxmp$leadership_next_binary2)
        termxmp$leadership_next_binary2[termxmp$leadership_next_binary2 == "dropout"] <- NA
        termxmp$leadership_next_binary2[termxmp$leadership_next_binary2 == "no function"] <- 0
        termxmp$leadership_next_binary2[termxmp$leadership_next_binary2 == "leadership"] <- 1
        termxmp$leadership_next_binary2[termxmp$leadership_next_binary2 == "higher parliament"] <- 1
        termxmp$leadership_next_binary2 <- as.numeric(termxmp$leadership_next_binary2)
        
        
      # committee importance: number of documents a party has introduced to committee 
      billxmp$committee_recoded <- gsub(" ", "", billxmp$committee_recoded)
      termxmp$nbills_com_party <- NA
      termxmp$namends_com_party <- NA
      termxmp$ndocus_com_party <- NA
      termxmp$narticles_com_party <- 0
      for (row in 1:nrow(termxmp)) {
        
        # identify committee of MP in respective term 
        mp_committee <- tail(billxmp$committee_recoded[which((billxmp$mp_name == termxmp$mp_name[row]) & (billxmp$term == termxmp$term[row]) & billxmp$committee_member == 1)], n=1)
        
        # count number of bills that MP's party submitted to committee in term
        if (termxmp$partei[row] == "AfD" | termxmp$partei[row] == "fraktionslos")
          termxmp$nbills_com_party[row] <- length(unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$AfD == 1))]))
        if (termxmp$partei[row] == "CDU")
          termxmp$nbills_com_party[row] <- length(unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$CDU == 1))]))
        if (termxmp$partei[row] == "FDP/DVP")
          termxmp$nbills_com_party[row] <- length(unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$`FDP/DVP` == 1))]))
        if (termxmp$partei[row] == "Gruene")
          termxmp$nbills_com_party[row] <- length(unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$GRUENE == 1))]))
        if (termxmp$partei[row] == "SPD")
          termxmp$nbills_com_party[row] <- length(unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$SPD == 1))]))
        
        # count number of committee bills that MP's party has submitted amendments to
        termxmp$namends_com_party[row] <- length(unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$partei == termxmp$partei[row]) & billxmp$amend == 1)]))
        
        # build total number of documents 
        termxmp$ndocus_com_party[row] <- termxmp$nbills_com_party[row] + termxmp$namends_com_party[row]
        
        # sum number of submitted article changes that MP's party has submitted to MP's committee throughout term
        drucksachen <- unique(billxmp$idDrucksache[which((billxmp$committee_recoded == mp_committee) & (billxmp$term == termxmp$term[row]) & (billxmp$partei == termxmp$partei[row]) & billxmp$amend==1)])
        if (length(drucksachen) == 0) next
        
        if (termxmp$partei[row] == "AfD" | termxmp$partei[row] == "fraktionslos")
          termxmp$narticles_com_party[row] <- sum(lengths(amendmentprop$n_articles[which(is.element(amendmentprop$idDrucksacheLegis, drucksachen) & amendmentprop$AfD == 1)])) 
        if (termxmp$partei[row] == "CDU")
          termxmp$narticles_com_party[row] <- sum(lengths(amendmentprop$n_articles[which(is.element(amendmentprop$idDrucksacheLegis, drucksachen) & amendmentprop$CDU == 1)])) 
        if (termxmp$partei[row] == "FDP/DVP")
          termxmp$narticles_com_party[row] <- sum(lengths(amendmentprop$n_articles[which(is.element(amendmentprop$idDrucksacheLegis, drucksachen) & amendmentprop$`FDP/DVP` == 1)])) 
        if (termxmp$partei[row] == "Gruene")
          termxmp$narticles_com_party[row] <- sum(lengths(amendmentprop$n_articles[which(is.element(amendmentprop$idDrucksacheLegis, drucksachen) & amendmentprop$GRUENE == 1)])) 
        if (termxmp$partei[row] == "SPD")
          termxmp$narticles_com_party[row] <- sum(lengths(amendmentprop$n_articles[which(is.element(amendmentprop$idDrucksacheLegis, drucksachen) & amendmentprop$SPD == 1)])) 
                      
      }
     
      
      # committee importance: committee seniority 
      termxmp$committee_seniority_mean <- 0
      termxmp$committee_seniority_sum <- 0
      for (row in 1:nrow(termxmp)) {
        
        # identify committee of MP in respective term 
        mp_committee <- tail(billxmp$committee_recoded[which((billxmp$mp_name == termxmp$mp_name[row]) & (billxmp$term == termxmp$term[row]) & billxmp$committee_member == 1)], n=1)
        if(length(mp_committee) == 0) {
          termxmp$committee_seniority_mean[row] <- 0
          termxmp$committee_seniority_sum[row] <- 0
          next
        }
        
        # identify all committee members in respective term 
        mp_names <- unique(billxmp$mp_name[which((billxmp$committee_recoded) == mp_committee & (billxmp$term == termxmp$term[row]) & (billxmp$committee_member == 1))])
        
        if(termxmp$term[row] == 14) {
          termxmp$committee_seniority_mean[row] <- mean(2011 - mps$seniority[which(is.element(mps$mp_name, mp_names))])
          termxmp$committee_seniority_sum[row] <- sum(2011 - mps$seniority[which(is.element(mps$mp_name, mp_names))])
        }
        
        if(termxmp$term[row] == 15) {
          termxmp$committee_seniority_mean[row] <- mean(2016 - mps$seniority[which(is.element(mps$mp_name, mp_names))])
          termxmp$committee_seniority_sum[row] <- sum(2016 - mps$seniority[which(is.element(mps$mp_name, mp_names))])
        }
        
        if(termxmp$term[row] == 16) {
          termxmp$committee_seniority_mean[row] <- mean(2021 - mps$seniority[which(is.element(mps$mp_name, mp_names))])
          termxmp$committee_seniority_sum[row] <- sum(2021 - mps$seniority[which(is.element(mps$mp_name, mp_names))])
        }
        
      }
      
      # committee importance: share of national election manifesto devoted to policy area of committee
      cmp <- read.csv("MPDataset_MPDS2020b.csv")
      cmp <- cmp %>% 
        filter(countryname == "Germany", 
               edate == "27/09/2009" | edate == "22/09/2013" | edate == "24/09/2017") %>% 
        rowwise() %>% 
        mutate(finanzen_wirtschaft_share = sum(per401, per402, per403, per404, per405, per408, per409, per410, per412, per414, per415, per416), # CAP macroeconomy & banking, finance and domestic commerce
               schule_jugend_sport_share = sum(per506, per507), # CAP education
               wissenschaft_forschung_kunst_share = sum(per411), # CAP space, science, technology and communications 
               staendiger_ausschuss_share =  sum(per201, per202,  per203, per204, per301, per302, per303, per304, per305, per413, per411, per503, per605), # CAP civil rights, government operations, state and local government administration, space, science, technology and communications, law crime and family issues 
               soziales_share = sum(per701, per702, per704, per601, per602, per607, per608, per503, per504, per505, per705, per706), # CAP labor, employment, immigration & social welfare
               inneres_share = sum(per605), # CAP law, crime and family issues
               land_raum_verkehr_share = sum(per411, per504, per505, per501), # CAP transportation, community development and housing issues, public lands
               other_share = 0, 
               umwelt_klima_share = sum(per703, per416, per501), # CAP agriculture, environment, energy
               cap1_share = sum(per404, per408, per410, per415, per416), # macroeconomy 
               cap2_share = sum(per201, per202, per503), # civil rights
               cap3_share = sum(per504, per505), # healthcare
               cap4_share = per703, # agriculture
               cap5_share = sum(per701, per702, per704, per601, per602, per607, per608), # labor, employment, immigration
               cap6_share = sum(per506, per507), # education
               cap7_share = sum(per416, per501), # environment
               cap8_share = per501, # energy
               cap10_share = per411, # transportation
               cap12_share = per605, # law, crime and family issues
               cap13_share = sum(per503, per504, per505, per705, per706), # social welfare
               cap14_share = sum(per504, per505), # community development and housing issues
               cap15_share = sum(per401, per402, per403, per405, per409, per412, per414), # banking, finance, and domestic commerce
               cap16_share = sum(per103, per104, per105, per106), # defense
               cap17_share = per411, # space, science, technology and communications 
               cap18_share = sum(per406, per407), # foreign trade
               cap19_share = sum(per101, per102, per107, per108, per109, per110), # international affairs
               cap20_share = sum(per203, per204, per301, per302, per303, per304, per305, per413), # government operations
               cap21_share = per501, # public lands
               cap24_share = sum(per301, per302) # state and local government administration
        )
      cmp$partyabbrev <- recode(cmp$partyabbrev, "90/Greens" = "Gruene", 
                                "CDU/CSU" = "CDU", "FDP" = "FDP/DVP")
      
      cmp_overall <- cmp %>% 
        group_by(partyabbrev) %>% 
        summarise(finanzen_wirtschaft_mean = mean(finanzen_wirtschaft_share),
                  schule_jugend_sport_mean = mean(schule_jugend_sport_share),
                  wissenschaft_forschung_kunst_mean = mean(wissenschaft_forschung_kunst_share),
                  staendiger_ausschuss_mean = mean(staendiger_ausschuss_share),
                  soziales_mean = mean(soziales_share),
                  inneres_mean = mean(inneres_share),
                  land_raum_verkehr_mean = mean(land_raum_verkehr_share),
                  other_mean = mean(other_share),
                  umwelt_klima_mean = mean(umwelt_klima_share),
        )
      cmp_overall <- cmp_overall[-1,c("partyabbrev", "staendiger_ausschuss_mean", 
                                    "finanzen_wirtschaft_mean", "umwelt_klima_mean", 
                                    "soziales_mean", "inneres_mean", 
                                    "schule_jugend_sport_mean", "wissenschaft_forschung_kunst_mean", 
                                    "land_raum_verkehr_mean", "other_mean")]
      
      termxmp$committee_cmpshare <- NA
      cmp$term <- cmp$edate 
      cmp$term  <- recode(cmp$term, "27/09/2009" = 14, 
             "22/09/2013" = 15, 
             "24/09/2017" = 16
             )
      
      for (row in 1:nrow(termxmp)) {
        
        # identify committee of MP in respective term 
        mp_committee <- tail(billxmp$committee_recoded[which((billxmp$mp_name == termxmp$mp_name[row]) & (billxmp$term == termxmp$term[row]) & billxmp$committee_member == 1)], n=1)
        if(length(mp_committee) == 0) {
          termxmp$committee_cmpshare[row] <- 0
          next
        }
        
        # extract share of national election manifesto devoted to policy area of committee in specific term
        if(mp_committee == "FinanzenundWirtschaft")
          termxmp$committee_cmpshare[row] <- cmp$finanzen_wirtschaft_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "Schule,Jugend,Sport")
          termxmp$committee_cmpshare[row] <- cmp$schule_jugend_sport_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "Wissenschaft,Forschung,Kunst")
          termxmp$committee_cmpshare[row] <- cmp$wissenschaft_forschung_kunst_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "StändigerAusschuss")
          termxmp$committee_cmpshare[row] <- cmp$staendiger_ausschuss_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "Soziales")
          termxmp$committee_cmpshare[row] <- cmp$soziales_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "Inneres")
          termxmp$committee_cmpshare[row] <- cmp$inneres_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "LändlicherRaum,Verkehr")
          termxmp$committee_cmpshare[row] <- cmp$land_raum_verkehr_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "Other")
          termxmp$committee_cmpshare[row] <- cmp$other_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        if(mp_committee == "UmweltundKlima")
          termxmp$committee_cmpshare[row] <- cmp$umwelt_klima_share[which(cmp$partyabbrev == termxmp$partei[row] & cmp$term == termxmp$term[row])]
        
      }
      
      # split into opposition and coalition data
      data_opposition <- termxmp[termxmp$opposition==1,]
      data_gov <- termxmp[termxmp$opposition==0,]
      
      
      ### --------------------------------------------
      # 3.1 x = amend 
      ### --------------------------------------------
        
        # opposition MPs (binary dependent variable)
        m_5a <- glm(leadership_next_binary ~ n_amends + n_speeches + n_anfragen + weiblich + age + experience +
                            position_highest + nbills_com_party + urban.district + log(district_magnitude) + partei,
                          data = data_opposition, family="binomial")  
        summary(m_5a)
        sqrt(diag(vcovCL(m_5a, cluster = ~mp_name)))
        r.squaredGLMM(m_5a)
      
        m_5b <- glm(leadership_next_binary ~ n_amends + n_speeches + n_anfragen + weiblich + age + experience +
                      position_highest + committee_cmpshare + urban.district + log(district_magnitude) + partei,
                    data = data_opposition, family="binomial")
        summary(m_5b)
        sqrt(diag(vcovCL(m_5b, cluster = ~mp_name)))
        r.squaredGLMM(m_5b)
        
        
      ### --------------------------------------------
      # 3.2 x = n_articles 
      ### --------------------------------------------
      
        # opposition MPs (binary dependent variable)
        m_6a <- glm(leadership_next_binary ~ n_articles + n_speeches + n_anfragen + weiblich + age + experience +
                            position_highest + nbills_com_party + urban.district + log(district_magnitude) + partei,
                          data = data_opposition, family="binomial")
        summary(m_6a)
        sqrt(diag(vcovCL(m_6a, cluster = ~mp_name)))
        r.squaredGLMM(m_6a)
       
        m_6b <- glm(leadership_next_binary ~ n_articles + n_speeches + n_anfragen + weiblich + age + experience +
                      position_highest + committee_cmpshare + urban.district + log(district_magnitude) + partei,
                    data = data_opposition, family="binomial")
        summary(m_6b)
        sqrt(diag(vcovCL(m_6b, cluster = ~mp_name)))
        r.squaredGLMM(m_6b)
        
         
        stargazer(m_5a, m_5b, m_6a, m_6b,
                  covariate.labels = c("Number of Amended Bills", "Number of Article Changes", "Number of Speeches", "Number of Parliamentary Questions", "Female", "Age", "Seniority", 
                                       "Leadership Position", "Committee Importance", "Urban District", "District Size"), 
                  se=list(sqrt(diag(vcovCL(m_5a, cluster = ~mp_name))), 
                          sqrt(diag(vcovCL(m_5b, cluster = ~mp_name))),
                          sqrt(diag(vcovCL(m_6a, cluster = ~mp_name))),
                          sqrt(diag(vcovCL(m_6b, cluster = ~mp_name)))),
                  star.cutoffs = c(.05, .01, .001)) 
        
       
        
# --------------------------------------------
# 4. effect visualizations
# --------------------------------------------
        ### ----------------------------
        # models 1 and 3
        # first differences for 
        # progressive vs. static
        # progressive vs. no ambition
        # static vs. no ambition 
        ### -----------------------------
        
        # ---------------------
        # model 1
        # ---------------------
        
        # get coefficients 
        coefs <- unlist(lapply(coef(m1)[[1]], mean)) 
        # coefs <- coef(m2_zelig)
        vhat <- vcov(m1)   
        draws <- mvrnorm(10000, coefs, vhat)
        
        # set covariate values, rest to means, compute first differences
        fds_prob <- as.data.frame(matrix(NA, nrow=10000, ncol=6))
        colnames(fds_prob) <- c("prog", "stat", "none", "prog_stat", "prog_none", "stat_none")
        
        prog <- c(1, # intercept
                  1, # candidaturesprog 
                  0, # candidaturesstatic
                  1, # weiblich
                  mean(dat_opp$age), 
                  mean(dat_opp$experience), 
                  0, # newbie
                  1, # committee_member
                  0, # position_highestfaction leadership 
                  0, # position_highestparliamentary leadership
                  1, # Erstmandat
                  1, # urban.district1
                  mean(log(dat_opp$district_magnitude)),
                  mean(dat_opp$vote_distance),
                  mean(log(dat_opp$factionsize)),
                  mean(log(dat_opp$datedist)),
                  1, # oppchair_committee
                  mean(log(dat_opp$n_bills_field)),
                  mean(log(dat_opp$bill_length)),
                  rep(0,3), 1, rep(0,11)
        )
        
        stat <- c(1, # intercept
                  0, # candidaturesprog 
                  1, # candidaturesstatic
                  1, # weiblich
                  mean(dat_opp$age), 
                  mean(dat_opp$experience),
                  0, # newbie
                  1, # committee_member
                  0, # position_highestfaction leadership 
                  0, # position_highestparliamentary leadership
                  1, # Erstmandat
                  1, # urban.district1
                  mean(log(dat_opp$district_magnitude)),
                  mean(dat_opp$vote_distance),
                  mean(log(dat_opp$factionsize)),
                  mean(log(dat_opp$datedist)),
                  1, # oppchair_committee
                  mean(log(dat_opp$n_bills_field)),
                  mean(log(dat_opp$bill_length)),
                  rep(0,3), 1, rep(0,11)
        )
        
        none <- c(1, # intercept
                  0, # candidaturesprog 
                  0, # candidaturesstatic
                  1, # weiblich
                  mean(dat_opp$age), 
                  mean(dat_opp$experience), 
                  0, # newbie
                  1, # committee_member
                  0, # position_highestfaction leadership 
                  0, # position_highestparliamentary leadership
                  1, # Erstmandat
                  1, # urban.district1
                  mean(log(dat_opp$district_magnitude)),
                  mean(dat_opp$vote_distance),
                  mean(log(dat_opp$factionsize)),
                  mean(log(dat_opp$datedist)),
                  1, # oppchair_committee
                  mean(log(dat_opp$n_bills_field)),
                  mean(log(dat_opp$bill_length)),
                  rep(0,3), 1, rep(0,11) 
        )
        
        # compute difference in predicted probabilities
        fds_prob[,"prog"] <- 1/(1+exp(-(draws %*% as.matrix(prog))))
        fds_prob[,"stat"] <- 1/(1+exp(-(draws %*% as.matrix(stat))))  
        fds_prob[,"none"] <- 1/(1+exp(-(draws %*% as.matrix(none))))
        fds_prob[,"prog_stat"] <- fds_prob[,"prog"] - fds_prob[,"stat"] 
        fds_prob[,"prog_none"] <- fds_prob[,"prog"] - fds_prob[,"none"] 
        fds_prob[,"stat_none"] <- fds_prob[,"stat"] - fds_prob[,"none"] 
        
        # compute uncertainties in terms of 83% (5/6) and 95% confidence intervals
        ci_prog80 <- quantile(fds_prob[,"prog"], c((1/6)/2, 1-(1/6)/2))
        ci_stat80 <- quantile(fds_prob[,"stat"], c((1/6)/2, 1-(1/6)/2))
        ci_none80 <- quantile(fds_prob[,"none"], c((1/6)/2, 1-(1/6)/2))
        ci_prog95 <- quantile(fds_prob[,"prog"], c(.025, .975))
        ci_stat95 <- quantile(fds_prob[,"stat"], c(.025, .975))
        ci_none95 <- quantile(fds_prob[,"none"], c(.025, .975))
        ci_prog_stat80 <- quantile(fds_prob[,"prog_stat"], c((1/6)/2, 1-(1/6)/2))
        ci_prog_none80 <- quantile(fds_prob[,"prog_none"], c((1/6)/2, 1-(1/6)/2))
        ci_stat_none80 <- quantile(fds_prob[,"stat_none"], c((1/6)/2, 1-(1/6)/2))
        ci_prog_stat95 <- quantile(fds_prob[,"prog_stat"], c(.025, .975))
        ci_prog_none95 <- quantile(fds_prob[,"prog_none"], c(.025, .975))
        ci_stat_none95 <- quantile(fds_prob[,"stat_none"], c(.025, .975))
        
        
        # visualize predicted probabilities and first differences
        lab.size <- 2.5
        axis.size <- 2.5
        
        ### left plot
        par(mfrow = c(1, 2), mar = c(7, 11, 2, 0), mgp=c(6,1,0), las = 1)
        plot(c(1:3), c(mean(fds_prob[,"prog"]), mean(fds_prob[,"stat"]), 
                       mean(fds_prob[,"none"])), 
             pch = c(rep(19,3)),
             ylab = "Predicted Probability of Bill Amendment", xlab = "",
             xlim = c(1, 3), 
             ylim = c(-0.05, 0.25),
             cex = 3,
             cex.lab = lab.size,
             axes = FALSE
        )
        
        
        axis(
          1,
          at = c(1:3),
          label = c("Progressive", "Static", "Discrete"),
          col = "white",
          cex.axis = axis.size
        )
        
        axis(2, cex.axis = axis.size)
        
        abline(h = 0,
               lty = 2) # Defines line type
        
        segments(c(1:3),
                 c(ci_prog80[1], ci_stat80[1], ci_none80[1]),
                 c(1:3),
                 c(ci_prog80[2], ci_stat80[2], ci_none80[2]),
                 col = "black",
                 lwd=8) 
        
        segments(c(1:3),
                 c(ci_prog95[1], ci_stat95[1], ci_none95[1]),
                 c(1:3),
                 c(ci_prog95[2], ci_stat95[2], ci_none95[2]),
                 col = "black", 
                 lwd = 2) 
        
        text(2, 0.24, labels = "Predicted Probabilities", cex=2.5)
        
        
        ### right plot
        par(mar = c(7, 0, 2, 11))
        plot(c(1:3), c(mean(fds_prob[,"prog_stat"]), mean(fds_prob[,"prog_none"]), 
                       mean(fds_prob[,"stat_none"])),
             pch = rep(17,3),
             cex = 3, 
             cex.lab = lab.size,
             ann = F,
             xlim = c(1, 3),
             # Range of x-axis
             ylim = c(-0.05, 0.25),
             # Range of y-axis
             axes = FALSE,
             # Suppresses both x and y axes
             col = "maroon3"
        )
        rect(-2,-2, 3.1, 2,
             border = F,
             col = "grey92")
        par(xpd=T)
        axis(1,
             at = c(1:3),
             labels = c("Progressive vs.\n Static", "Progressive vs.\n Discrete", "Static vs.\n Discrete"),
             col.axis = "Maroon3",
             col = "white",
             cex.axis = axis.size,
             padj=1.1
        )
        par(xpd=F)
        axis(4, cex.axis = axis.size)
        abline(h = 0,
               lty = 2) # Defines line type
        segments(c(1:3),
                 c(ci_prog_stat80[1], ci_prog_none80[1], ci_stat_none80[1]),
                 c(1:3),
                 c(ci_prog_stat80[2], ci_prog_none80[2], ci_stat_none80[2]) ,
                 col = "Maroon3",
                 lwd=8) 
        
        segments(c(1:3),
                 c(ci_prog_stat95[1], ci_prog_none95[1], ci_stat_none95[1]),
                 c(1:3),
                 c(ci_prog_stat95[2], ci_prog_none95[2], ci_stat_none95[2]) ,
                 col = "Maroon3",
                 lwd = 2,) 
        points(c(1:3),
               c(mean(fds_prob[,"prog_stat"]), mean(fds_prob[,"prog_none"]), 
                 mean(fds_prob[,"stat_none"])),
               pch = rep(17,3), # Type of plotting symbol
               col = "Maroon3", 
               cex = 3)
        
        
        text(2, 0.24, labels = "Difference in\nPredicted Probabilities", cex=2.5)
        
        # ---------------------
        # model 3
        # ---------------------
        
        # get coefficients 
        coefs <- unlist(lapply(coef(m3)[[1]], mean))
        vhat <- vcov(m3)   
        draws <- mvrnorm(10000, coefs, vhat)
        
        # set covariate values, rest to means, compute first differences
        fds_count <- as.data.frame(matrix(NA, nrow=10000, ncol=6))
        colnames(fds_count) <- c("prog", "stat", "none", "prog_stat", "prog_none", "stat_none")
        
        prog <- c(1, # intercept
                  1, # candidaturesprog 
                  0, # candidaturesstatic
                  1, # weiblich
                  mean(dat_opp$age), 
                  mean(dat_opp$experience), 
                  0, # newbie
                  1, # committee_member
                  0, # position_highestfaction leadership 
                  0, # position_highestparliamentary leadership
                  1, # Erstmandat
                  1, # urban.district1
                  mean(log(dat_opp$district_magnitude)),
                  mean(dat_opp$vote_distance),
                  mean(log(dat_opp$factionsize)),
                  mean(log(dat_opp$datedist)),
                  1, # oppchair_committee
                  mean(log(dat_opp$n_bills_field)),
                  mean(log(dat_opp$bill_length)),
                  rep(0,3), 1, rep(0,11)
        )
        
        stat <- c(1, # intercept
                  0, # candidaturesprog 
                  1, # candidaturesstatic
                  1, # weiblich
                  mean(dat_opp$age), 
                  mean(dat_opp$experience),
                  0, # newbie
                  1, # committee_member
                  0, # position_highestfaction leadership 
                  0, # position_highestparliamentary leadership
                  1, # Erstmandat
                  1, # urban.district1
                  mean(log(dat_opp$district_magnitude)),
                  mean(dat_opp$vote_distance),
                  mean(log(dat_opp$factionsize)),
                  mean(log(dat_opp$datedist)),
                  1, # oppchair_committee
                  mean(log(dat_opp$n_bills_field)),
                  mean(log(dat_opp$bill_length)),
                  rep(0,3), 1, rep(0,11)
        )
        
        none <- c(1, # intercept
                  0, # candidaturesprog 
                  0, # candidaturesstatic
                  1, # weiblich
                  mean(dat_opp$age), 
                  mean(dat_opp$experience), 
                  0, # newbie
                  1, # committee_member
                  0, # position_highestfaction leadership 
                  0, # position_highestparliamentary leadership
                  1, # Erstmandat
                  1, # urban.district1
                  mean(log(dat_opp$district_magnitude)),
                  mean(dat_opp$vote_distance),
                  mean(log(dat_opp$factionsize)),
                  mean(log(dat_opp$datedist)),
                  1, # oppchair_committee
                  mean(log(dat_opp$n_bills_field)),
                  mean(log(dat_opp$bill_length)),
                  rep(0,3), 1, rep(0,11) 
        )
        
        # compute difference in predicted probabilities
        fds_count[,"prog"] <- exp(draws %*% as.matrix(prog))
        fds_count[,"stat"] <- exp(draws %*% as.matrix(stat))
        fds_count[,"none"] <- exp(draws %*% as.matrix(none))
        fds_count[,"prog_stat"] <- fds_count[,"prog"] - fds_count[,"stat"] 
        fds_count[,"prog_none"] <- fds_count[,"prog"] - fds_count[,"none"] 
        fds_count[,"stat_none"] <- fds_count[,"stat"] - fds_count[,"none"] 
        
        # compute uncertainties in terms of 95% confidence intervals
        ci_prog80 <- quantile(fds_count[,"prog"], c((1/6)/2, 1-(1/6)/2))
        ci_stat80 <- quantile(fds_count[,"stat"], c((1/6)/2, 1-(1/6)/2))
        ci_none80 <- quantile(fds_count[,"none"], c((1/6)/2, 1-(1/6)/2))
        ci_prog95 <- quantile(fds_count[,"prog"], c(.025, .975))
        ci_stat95 <- quantile(fds_count[,"stat"], c(.025, .975))
        ci_none95 <- quantile(fds_count[,"none"], c(.025, .975))
        ci_prog_stat80 <- quantile(fds_count[,"prog_stat"], c((1/6)/2, 1-(1/6)/2))
        ci_prog_none80 <- quantile(fds_count[,"prog_none"], c((1/6)/2, 1-(1/6)/2))
        ci_stat_none80 <- quantile(fds_count[,"stat_none"], c((1/6)/2, 1-(1/6)/2))
        ci_prog_stat95 <- quantile(fds_count[,"prog_stat"], c(.025, .975))
        ci_prog_none95 <- quantile(fds_count[,"prog_none"], c(.025, .975))
        ci_stat_none95 <- quantile(fds_count[,"stat_none"], c(.025, .975))
        
        
        # visualize expected values and first differences
        lab.size <- 2.5
        axis.size <- 2.5
        
        ### left plot
        par(mfrow = c(1, 2), mar = c(7, 11, 2, 0), mgp=c(6,1,0), las = 1)
        plot(c(1:3), c(mean(fds_count[,"prog"]), mean(fds_count[,"stat"]), 
                       mean(fds_count[,"none"])), 
             pch = c(rep(19,3)),
             ylab = "Number of Altered Articles", xlab = "",
             xlim = c(1, 3), 
             ylim = c(-0.1, 0.35),
             cex = 3,
             cex.lab = lab.size,
             axes = FALSE
        )
        
        axis(
          1,
          at = c(1:3),
          label = c("Progressive", "Static", "Discrete"),
          col = "white",
          cex.axis = axis.size
        )
        
        axis(2, cex.axis = axis.size)
        
        abline(h = 0,
               lty = 2) # Defines line type
        
        segments(c(1:3),
                 c(ci_prog80[1], ci_stat80[1], ci_none80[1]),
                 c(1:3),
                 c(ci_prog80[2], ci_stat80[2], ci_none80[2]),
                 col = "black",
                 lwd=8) 
        
        segments(c(1:3),
                 c(ci_prog95[1], ci_stat95[1], ci_none95[1]),
                 c(1:3),
                 c(ci_prog95[2], ci_stat95[2], ci_none95[2]),
                 col = "black", 
                 lwd=2) 
        
        text(2, 0.34, labels = "Expected Values", cex=2.5)
        
        
        ### right plot
        par(mar = c(7, 0, 2, 11))
        plot(c(1:3), c(mean(fds_count[,"prog_stat"]), mean(fds_count[,"prog_none"]), 
                       mean(fds_count[,"stat_none"])),
             pch = rep(17,3),
             ann = F,
             xlim = c(1, 3),
             # Range of x-axis
             ylim = c(-0.1, 0.35),
             # Range of y-axis
             axes = FALSE,
             cex = 3,
             cex.lab = lab.size,
             # Suppresses both x and y axes
             col = "maroon3"
        )
        rect(-2,-2, 3.1, 2,
             border = F,
             col = "grey92")
        axis(1,
             at = c(1:3),
             labels = c("Progressive vs.\n Static", "Progressive vs.\n Discrete", "Static vs.\n Discrete"),
             col.axis = "Maroon3",
             col = "white",
             cex.axis = axis.size,
             padj=1.1
        )
        axis(4, cex.axis = axis.size)
        abline(h = 0,
               lty = 2) # Defines line type
        points(c(1:3),
               c(mean(fds_count[,"prog_stat"]), mean(fds_count[,"prog_none"]), 
                 mean(fds_count[,"stat_none"])),
               pch = rep(17,3), # Type of plotting symbol
               cex=3,
               col = "Maroon3")
        
        segments(c(1:3),
                 c(ci_prog_stat80[1], ci_prog_none80[1], ci_stat_none80[1]),
                 c(1:3),
                 c(ci_prog_stat80[2], ci_prog_none80[2], ci_stat_none80[2]) ,
                 col = "Maroon3",
                 lwd=8) 
        
        segments(c(1:3),
                 c(ci_prog_stat95[1], ci_prog_none95[1], ci_stat_none95[1]),
                 c(1:3),
                 c(ci_prog_stat95[2], ci_prog_none95[2], ci_stat_none95[2]) ,
                 col = "Maroon3", 
                 lwd=2) 
        
        text(2, 0.34, labels = "First Differences", cex=2.5)
        
        
       
        ### -----------------
        # model m_6a
        ### -----------------
       
        library(plyr) # code above doesn't run once plyr is loaded 
         
        # get coefficients 
        coefs <- coef(m_6a)
        vhat <- vcovCL(m_6a, cluster = ~mp_name)
        draws <- mvrnorm(1000, coefs, vhat)
        
        # set covariate values, rest to means, vary over n_articles
        mus <- matrix(NA, 
                      nrow=1000, 
                      ncol=length(table(data_opposition$n_articles)))
        
        mus_list <- list()
        
        for (case in 1: nrow(data_opposition)) {
          
          for (i in 1:length(table(data_opposition$n_articles))) {
            
            setting <- c(1, sort(unique(data_opposition$n_articles))[i],
                         data_opposition$n_speeches[case], 
                         data_opposition$n_anfragen[case],
                         data_opposition$weiblich[case], 
                         data_opposition$age[case],
                         data_opposition$experience[case], 
                         as.numeric(data_opposition$position_highest[case]), 
                         data_opposition$nbills_com_party[case],
                         data_opposition$urban.district[case],  
                         log(data_opposition$district_magnitude[case]),
                         0, 0, 0, 0, 0)
            
            mus[,i] <- 1/(1+exp(-(draws %*% as.matrix(setting))))
            
          }
          
          mus_list[[case]] <- as.data.frame(mus)
          
        }
        
        mus <- rbind.fill(mus_list)
        
        # visualize expected values
        quantiles_025 <- quantiles_975 <- rep(NA, length(table(data_opposition$n_articles)))
        for (i in 1:ncol(mus)) {
          quantiles_025[i] <- quantile(mus[,i], probs=0.025)
          quantiles_975[i] <- quantile(mus[,i], probs=0.975)
        }
        
        sds <- rep(NA, length(table(data_opposition$n_articles)))
        for (i in 1:ncol(mus)) {
          sds[i] <- sd(mus[,i])
        }
        
        
        exp_values <- as.data.frame(cbind(colMeans(mus),
                                          sort(unique(data_opposition$n_articles)),
                                          quantiles_025, quantiles_975, sds
        )
        )
        colnames(exp_values) <- c("probs", "n_articles", "quantiles_025", "quantiles_975", "sds")
        exp_values$probs <- as.numeric(as.character(exp_values$probs))
        exp_values$n_articles <- as.numeric(as.character(exp_values$n_articles))
        exp_values$quantiles_025 <- as.numeric(as.character(exp_values$quantiles_025)) 
        exp_values$quantiles_975 <- as.numeric(as.character(exp_values$quantiles_975)) 
        exp_values$sds <- as.numeric(as.character(exp_values$sds)) 
        
        
        # only plot until n_articles=33
        exp_values2 <- exp_values[1:19,]
        
        ggplot(exp_values2, aes(x=n_articles, y=probs)) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/2, 
                          ymax=probs+sds/2), alpha=0.1) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/2.5, 
                          ymax=probs+sds/2.5), alpha=0.15) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/3, 
                          ymax=probs+sds/3), alpha=0.2) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/4, 
                          ymax=probs+sds/4), alpha=0.3) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/5, 
                          ymax=probs+sds/5), alpha=0.4) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/6, 
                          ymax=probs+sds/6), alpha=0.5) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/7, 
                          ymax=probs+sds/7), alpha=0.6) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/8, 
                          ymax=probs+sds/8), alpha=0.7) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/9, 
                          ymax=probs+sds/9), alpha=0.8) +
          geom_ribbon(aes(x=n_articles,
                          ymin=probs-sds/10, 
                          ymax=probs+sds/10), alpha=0.9) +
          geom_line(aes(x=n_articles, y=probs), lwd=1.3, col = "black") +
          scale_x_continuous(breaks=seq(0, 20, 4), limits=c(0,20),) +
          scale_y_continuous(breaks=seq(0, 0.7, 0.1)) +
          scale_colour_manual(values = c("darkgrey", "black"), name = "", labels = c("", "")) +
          scale_fill_manual(values = c("darkgrey", "black"), name = "", labels = c("", "")) + 
          guides(color=F, fill=F, fill = guide_legend(reverse = TRUE), color = guide_legend(reverse = TRUE)) +
          theme_bw() +
          theme(panel.grid.minor = element_blank(), 
                axis.line = element_line(colour = "black"),   
                axis.text=element_text(size=25), axis.title=element_text(size=25),
                legend.text=element_text(size=25), legend.title=element_text(size=25)) +
          xlab("Number of Amended Articles") + ylab("Predicted Probability of Leadership Position")
        
        
  #####################################
  ### summary statistics for theory
        
        wp14 <- which(amendmentprop$CDU ==0 | amendmentprop$`FDP/DVP` ==0)
        wp15 <- which(amendmentprop$Gruene ==0 | amendmentprop$SPD ==0)
        wp16 <- which(amendmentprop$Gruene ==0 | amendmentprop$CDU ==0)
        opp <- c(wp14, wp15, wp16)
        
        
        amendmentprop <- amendmentprop[opp,]
        length(which(amendmentprop$GRUENE==1))
        
        library(ngram)
        amendmentprop$amend_length <- unlist(lapply(amendmentprop$rawtext_left, wordcount))
        
        
  ###################################
  ### summary statistics of data
  # billXMP 
  summary(dat_opp$amend)
  summary(dat_opp$n_articles)      
  table(dat_opp$ambitionen)
  table(dat_opp$media)      
  #termXMP
  summary(data_opposition$leadership_next_binary)
  summary(data_opposition$n_amends)
  summary(data_opposition$n_articles)
  
  
  ###################################
  ### how many MPs on average sign an amendment proposal?
  n_authors <- rep(0, nrow(amendmentprop))
  for (row in 1:nrow(amendmentprop)) 
    n_authors[row] <- length(which(str_detect(amendmentprop$members[row], mps$last.name)))
  mean(n_authors)
  
  ###################################
  ### GLES Data Kandidatenstudie
  library(foreign)
  ka09 <- read.dta("GLES_ZA5318_Pre1.0.dta")
  ka09$bu_inc <- 1
  ka09$bu_inc[which(ka09$a19_6==0 | ka09$a19_6>90)] <- 0
  table(ka09$a12)
  table(ka09$a12, ka09$bu_inc)
        
        
        
        
        
        
        
        
        
        
        
        
        
    

  